setwd("F:\\1529407184_Assignment")
data <- load("KBO1.rda")
data <- KBO1

str(data)

data$ln_y <- log(data$Y)

scatterplotMatrix(~ X1+X2+X3+X4+X5+X6+X7, data = data)

install.packages("magrittr")
library(magrittr)

x <- model.matrix(Y~., data)[,c(-9,-10)]
y <- data$Y
lambda <- 10^seq(10, -2, length = 100)

#create test and training sets
library(glmnet)

set.seed(489)
train = sample(1:nrow(x), nrow(x)/2)
test = (-train)
ytest = y[test]

#OLS
datalm <- lm(Y~., data = data)
coef(datalm)

#ridge
ridge.mod <- glmnet(x, y, alpha = 0, lambda = lambda)
predict.glmnet(ridge.mod, s = 0, exact = T, type = 'coefficients')[1:8,]


datalm <- lm(Y~., data = data, subset = train)
ridge.mod <- glmnet(x[train,], y[train], alpha = 0, lambda = lambda)
#find the best lambda from our list via cross-validation
cv.out <- cv.glmnet(x[train,], y[train], alpha = 0)

bestlam <- cv.out$lambda.min


#make predictions
ridge.pred <- predict(ridge.mod, s = bestlam, newx = x[test,])
s.pred <- predict(datalm, newdata = data[test,])
#check MSE
mean((s.pred-ytest)^2)

mean((ridge.pred-ytest)^2)

#a look at the coefficients
out = glmnet(x[train,],y[train],alpha = 0)
predict(ridge.mod, type = "coefficients", s = bestlam)[1:6,]

lasso.mod <- glmnet(x[train,], y[train], alpha = 1, lambda = lambda)
lasso.pred <- predict(lasso.mod, s = bestlam, newx = x[test,])
mean((lasso.pred-ytest)^2)

lasso.coef  <- predict(lasso.mod, type = 'coefficients', s = bestlam)[1:6,]